#### ---- Setup --------------------------------------------------------- ####
rm(list = ls())

## Uncomment to install marginaleffects package
#remotes::install_github("vincentarelbundock/marginaleffects")

suppressMessages({
  library(tidyverse)
  library(estimatr)
  library(cjoint)
  library(marginaleffects)
  library(coefplot)
  library(ggthemes)
  library(ggforce)
  library(ggbeeswarm)
  library(haven)
  library(lemon)
  library(cowplot)
  library(autumn)
  library(xtable)
  library(grid)
  library(ggrepel)
})


## Municipal survey of Yonkers residents 
cvs_dat <-
  read_rds("municipal_survey.rds")

## National survey of US adults 
lucid_dat <-
  read_rds("national_survey.rds")

## Stacked conjoint dataset (police + residents)
conjoint_dat <-
  read_rds("conjoints_stacked.rds")

## LEMAS/Census dataset used to create Fig. 1
polcom_dat <- read_rds("polcomrep.rds")

## Combined survey data on misperceptions (long format) used to create Fig. 2
long_dat <- read_rds("combined_long_survey.rds")


#### ---- Fig. 1: share of non-White officers v. non-White residents ----- #### 
g <-
  ggplot(
    polcom_dat,
    aes(
      x = share_minority_civ,
      y = share_minority_cop,
      size = tot_civ,
      color = illustrative_cases_dummy,
      fill = illustrative_cases_dummy
    )
  ) +
  geom_point(alpha = 0.8) +
  scale_x_continuous(limits = c(0, 1)) +
  scale_y_continuous(limits = c(0, 1)) +
  geom_abline(
    intercept = 0,
    slope = 1,
    color = "darkgray",
    linetype = "dashed",
    size = 1
  ) +
  theme_minimal() +
  guides(size = "none",
         color = "none",
         fill = "none") +
  xlab("Share of Non-White Residents") +
  ylab("Share of Non-White Officers") +
  scale_fill_colorblind() +
  scale_color_colorblind() +
  geom_label_repel(
    data = subset(polcom_dat, illustrative_cases_dummy == TRUE),
    aes(
      label = illustrative_cases,
      size = NULL,
      color = NULL,
      fill = NULL
    ),
    nudge_y = -0.1,
    nudge_x = 0.1,
    segment.curvature = -1e-20
  )

## Print graph
g

## Export graph to pdf
ggsave(g, filename =  "fig1.pdf", width = 6, height = 4)


#### -------- Related analyses reported in text -------------------------- ####

## Print descriptives for white and non-white shares 
polcom_dat%>%  
  summarise(
    ## White cops
    tot_white_cop = sum(tot_white_cop, na.rm = TRUE), 
    ## Total cops
    tot_cop = sum(tot_cop, na.rm = TRUE),
    ## White officer share        
    pct_white_cop = tot_white_cop/tot_cop,
    ## White civilians
    tot_white_civ = sum(tot_white_civ, na.rm = TRUE),
    ## Total civilians
    tot_civ = sum(tot_civ, na.rm = TRUE),
    ## White civilian share
    pct_white_civ = tot_white_civ/tot_civ)


## Rank order jurisdictions by size of gap between minority population share
## and officer population share and print Yonkers rank
polcom_dat %>%
  mutate(share_diff = share_minority_civ - share_minority_cop,
         pct_rank = percent_rank(share_diff)) %>%
  filter(str_detect(Geo_NAME, "YONKER")) %>%
  pull(pct_rank)



#### ---- Fig. 2: belief accuracy in national and municipal samples ------ #### 
g <- 
  long_dat %>%
  filter(!(group %in% c("Female", "Non-White"))) %>% 
  ## Top code extreme values for plotting
  mutate(diff = case_when(diff > 50 ~ 50,
                          diff < -50 ~ -50,
                          TRUE ~ diff)) %>%
  ggplot(., aes(
    y = diff,
    x = group,
    color = diff,
    fill = diff,
    group = sample
  )) +
  geom_hline(
    yintercept = 0,
    color = "black",
    linetype = "solid",
    size = 0.5
  ) +
  geom_quasirandom(
    groupOnX = TRUE,
    color = "black",
    stroke = 0.05,
    method = "tukeyDense",
    varwidth = TRUE,
    bandwidth = 2,
    pch = 21,
    size = 2.5
  ) +
  #https://colorspace.r-forge.r-project.org/reference/scale_colour_continuous_diverging.html
  colorspace::scale_fill_continuous_diverging("Vik", rev = TRUE, p1 = 0.5,
                                              l2 = 70, l1 = 20) + 
  facet_row(~ sample, space = "free", scale = "free_x") +
  theme_tufte() +
  lemon::coord_flex_cart(
    bottom = lemon::brackets_horizontal(length = unit(0.5, 'cm'))#,
    #left = lemon::capped_vertical(capped = "both")
  ) +
  theme(
    strip.text = element_text(size = 14, color = "black", vjust = -0.25),
    axis.ticks.length.y = unit(0.2, "cm"),
    axis.ticks.y = element_line(size = 0.50, color = "black"),
    axis.line.y = element_line(size = 0.50, color = "black"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12),
    axis.text.y = element_text(size = 12, color = "black"),
    axis.text.x = element_text(size = 12, colour = "black"),
    plot.margin = unit(c(
      t = -1.5,
      r = 0,
      b = -0.5,
      l = 0.1
    ), "lines"),
    legend.position = "none"
  ) +
  labs(subtitle = "",
       x = "",
       y = "Perceived - actual share of police officers"
  )

g 

## Export plot
ggsave(g, filename = "fig2.pdf", width = 6.5, height = 5)

#### -------- Related analyses reported in text -------------------------- ####

## Estimate differences for each group w/ SEs clustered at respondent level
est_diffs <-
  long_dat %>% 
  group_by(sample, group) %>% 
  do(tidy(lm_robust(
    diff ~ 1, data = .,
    clusters = person_id
  ))) %>% 
  ungroup() %>% 
  mutate_if(is.numeric, round, 2) %>% 
  arrange(desc(sample))

## Print averages reported in manuscript 
est_diffs %>% 
  filter(group %in% c("Female", "Non-White")) %>% 
  select(group, estimate, std.error, statistic, p.value, sample) %>% 
  arrange(desc(sample), group)





#### ---- Fig. 3: estimated average treatment effects -------------------- ####

## NB: code below just relies on regression estimator w/o covariate adjustment 
## unless we have a baseline (pre-treatment) measure of the outcome variable, 
## in which case the baseline measure of the outcome is included for covariate 
## adjustment. For items created using inverse covariance weighting, the _s 
## denotes the standardized version of the index. This is created using 
## Glass delta, which standardizes by dividing DV by control group SD. 
## Estimates can then interpreted in terms of "standard units" or SDs from the
## untreated (control group) mean.

#### -------- Print reliability estimates for scales --------------------- ####

## Trust and confidence in police
cvs_dat %>% 
  filter(Z_diversity_t1 == "control") %>% 
  select(y_trust_ypd_n_t0, y_trust_simple_n_t0) %>% 
  psych::alpha()

## Willingness to cooperate with police
cvs_dat %>% 
  filter(Z_diversity_t1 == "control") %>% 
  select(y_coop_cmt_n_t1, y_coop_reportsusp_n_t1, y_coop_reportcrime_n_t1, 
         y_coop_inform_n_t1) %>% 
  psych::alpha()

## Support for affirmative action
cvs_dat %>% 
  filter(Z_diversity_t1 == "control") %>% 
  select(y_aa_female_n_t1, y_aa_black_n_t1, y_aa_latino_n_t1, 
         y_aa_asian_n_t1) %>% 
  psych::alpha()

lucid_dat %>% 
  filter(Z_diversity == "Control") %>% 
  select(y_aa_female_n, y_aa_black_n, y_aa_latino_n, y_aa_asian_n) %>% 
  psych::alpha()

## Tie-breaking hires
cvs_dat %>% 
  filter(Z_diversity_t1 == "control") %>% 
  select(y_trolley_female_n_t1, y_trolley_black_n_t1, y_trolley_latino_n_t1, 
         y_trolley_asian_n_t1) %>% 
  psych::alpha()

lucid_dat %>% 
  filter(Z_diversity == "Control") %>% 
  select(y_trolley_female_n, y_trolley_black_n, y_trolley_latino_n, 
         y_trolley_asian_n) %>% 
  psych::alpha()

#### -------- Estimation: OLS, municipal sample -------------------------- ####

## Affirmative action hiring (y_aa_idx)
est_aa_idx <-
  lm_robust(y_aa_idx_s_t1 ~ Z_diversity_t1, cvs_dat) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3)

## Hiring minority applicants (y_trolley_idx) 
est_trolley_idx <- 
  lm_robust(y_trolley_idx_s_t1 ~ Z_diversity_t1,
            data = cvs_dat) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3)

## Donations to pro-Black organization (y_donate) 
est_donate <- 
  cvs_dat %>%
  lm_robust(y_donate_s_t1 ~ Z_diversity_t1, data = .) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3)

## Political action (y_vote_dvsty)
est_vote <- 
  cvs_dat %>%
  lm_robust(y_vote_dvsty_s_t1 ~ Z_diversity_t1, data = .) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3)

## Trust in police (y_trust_idx)
est_trust_idx <- 
  lm_lin(y_trust_idx_s_t1 ~ Z_diversity_t1,
         covariates =  ~ y_trust_idx_t0, data = cvs_dat) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "Z_diversity_t1info")

## Willingness to cooperate (y_coop_idx) 
est_coop_idx <- 
  lm_lin(y_coop_idx_s_t1 ~ Z_diversity_t1,
         covariates = ~ y_coop_idx_t0,
         data = cvs_dat) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "Z_diversity_t1info")

## Combine
cvs_est_df <- 
  bind_rows(est_aa_idx, est_trolley_idx, est_donate, est_vote, 
            est_trust_idx, est_coop_idx) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(sample = "Municipal sample")

#### -------- Estimation: OLS, national sample --------------------------- ####

## Subset for comparisons between information and no information groups
lucid_subset <- 
  lucid_dat %>% 
  filter(Z_diversity %in% c("Control", "Info"))

## Affirmative action hiring (y_aa_idx)
est_aa_idx <-
  lm_robust(y_aa_idx_s ~ Z_diversity, lucid_subset) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3)

## Hiring minority applicants (y_trolley_idx) 
est_trolley_idx <-
  lm_robust(y_trolley_idx_s ~ Z_diversity, lucid_subset) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) 

lucid_est_df <- 
  bind_rows(est_aa_idx, est_trolley_idx) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(sample = "National sample")

#### -------- Combine OLS estimates and print results -------------------- ####
est_ols <-
  bind_rows(cvs_est_df, lucid_est_df) %>%
  mutate(
    estimator = "OLS",
    estimand = "ATE",
    sample = factor(sample, levels = rev(c(
      "National sample",
      "Municipal sample"
    ))),
    outcome = case_when(
      str_detect(outcome, "y_trolley_idx") ~ "Diversity hiring",
      str_detect(outcome, "y_aa_idx") ~ "Affirmative action",
      outcome == "y_vote_dvsty_s_t1" ~ "Political action",
      outcome == "y_donate_s_t1" ~ "Donations to pro-Black group",
      outcome == "y_trust_idx_s_t1" ~ "Trust in the police",
      outcome == "y_coop_idx_s_t1" ~ "Willingness to cooperate"
    ),
    outcome_lab = factor(
      outcome,
      levels = c(
        "Affirmative action",
        "Diversity hiring",
        "Political action",
        "Donations to pro-Black group",
        "Trust in the police",
        "Willingness to cooperate"
      ),
      labels = c(
        "Support for affirmative action in recruitment and hiring",
        "Support for tie-breaking in favor of minority applicants",
        "Voted for police department diversification",
        "Donation to Black police officer association",
        "Trust and confidence in the police department",
        "Willingness to cooperate with police officers"
      )
    )
  ) %>% 
  select(estimate, std.error, conf.low, conf.high, statistic, p.value, sample, 
         outcome, estimator, estimand, outcome_lab)

est_ols %>% 
  select(outcome, estimate, std.error, statistic, p.value, sample) %>% 
  arrange(outcome, desc(sample)) %>% 
  mutate_if(is.numeric, round, 2)

## Export estimates (NB: code in the SI imports these estimates to create 
## tables, figures)
write_rds(est_ols, "estimated_ates_manuscript.rds")

#### -------- Related analyses reported in text -------------------------- ####
## Control group diffs for hiring outcome: White v. non-Whites:
lucid_dat %>% 
  filter(Z_diversity == "Control") %>% 
  mutate(x_white = as.numeric(x_race == "White")) %>% 
  do(tidy(lm_robust(y_trolley_idx_s ~ x_white, data = .)))

cvs_dat %>% 
  filter(Z_diversity_t1 == "control") %>% 
  mutate(x_white = as.numeric(x_race_cat_t0 == "White")) %>% 
  do(tidy(lm_robust(y_trolley_idx_s_t1 ~ x_white, data = .)))

## Group means for donation outcome
cvs_dat %>% 
  filter(R_t1 == 1) %>% 
  group_by(Z_diversity_t1) %>% 
  summarise(prop_donate = mean(y_donate_binary_n_t1),
            avg_donate = mean(y_donate_n_t1)) %>% 
  mutate_if(is.numeric, round, 2)

## Group means for voting outcome
cvs_dat %>% 
  filter(R_t1 == 1) %>% 
  group_by(Z_diversity_t1) %>% 
  summarise(prop_vote = mean(y_vote_dvsty_t1, na.rm = TRUE)) %>% 
  mutate_if(is.numeric, round, 2)


## Benchmark trust effect against Gallup poll change from 2019 to 2020:
## Trust in police in 2019: 0.53 reported "Quite a lot" or "Great deal" in 2019; 
## down to 0.48 in 2020. A decrease of 5 percentage points. 
## https://news.gallup.com/poll/352304/black-confidence-police-recovers-2020-low.aspx
cvs_dat %>% 
  mutate(trust_binary_t1 = as.numeric(y_trust_ypd_n_t1 > 3),
         trust_binary_t0 = as.numeric(y_trust_ypd_n_t0 > 3)) %>% 
  lm_lin(trust_binary_t1 ~ Z_diversity_t1,
         covariates = ~ trust_binary_t0, 
         data = .) %>% 
  tidy() %>%
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "Z_diversity_t1info")

#### -------- Generate Fig. 3 and export PDF ----------------------------- ####
g <- 
  est_ols %>%
  filter(estimator == "OLS") %>% 
  ggplot(
    .,
    aes(
      y = sample,
      x = estimate,
      shape = sample,
      group = sample,
      color = sample,
      fill = sample
    )
  ) +
  geom_vline(
    xintercept = 0,
    color = "black",
    linetype = "dashed",
    size = 0.4
  ) +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high),
    height = 0,
    position = position_dodgev(height = 0.7),
    size = 1.25
  ) +
  geom_point(
    position = position_dodgev(height = 0.7),
    size = 3,
    color = "white"
  ) +
  scale_color_manual("", values = rev(c("#888888", "#000000"))) +
  scale_fill_manual("", values = rev(c("#888888", "#000000"))) +
  scale_shape_manual("", values = rev(c(21, 22))) +
  scale_x_continuous(limits = c(-0.45, 0.45)) + 
  facet_col( ~ outcome_lab, space = "free", scales = "free_y") +
  theme_tufte(base_size = 14) +
  theme(
    axis.ticks.y = element_blank(),
    axis.ticks.length.x = unit(0.2, "cm"),
    axis.ticks.x = element_line(size = 0.4),
    strip.text = element_text(size = 14, hjust = 0),
    axis.line.x = element_line(size = 0.4),
    axis.title.x = element_text(size = 12),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_blank(),
    plot.margin = unit(c(
      t = -1,
      r = 0.5,
      b = 0.1,
      l = -0.5
    ), "lines"),
    legend.position = "none"
  ) +
  labs(subtitle = "",
       x = "Average treatment effect estimate (in standard units)",
       y = "")

p <- 
  g +
  geom_label(
    data = g$data %>%
      filter(outcome == "Affirmative action"),
    aes(label = sample), 
    nudge_x = 0.3,
    color = "white",
    size = 3.5
  )

p

ggsave(p, filename = "fig3.pdf", width = 5, height = 5)

#### ---- Fig. 4: estimated average marginal component effects ----------- #### 

## Plotting params
fixed <- 1
spacing <- .1285

## Setup for cjoint (to adjust for restricted randomization in conjoints) 

## Construct the conjoint design for cjoint
attribute_list <- list()
attribute_list[["sex_"]] <- levels(conjoint_dat$sex_)
attribute_list[["age_"]] <- levels(conjoint_dat$age_)
attribute_list[["race_"]] <- levels(conjoint_dat$race_)
attribute_list[["education_"]] <- levels(conjoint_dat$education_)
attribute_list[["residency_"]] <- levels(conjoint_dat$residency_)
attribute_list[["exam_"]] <- levels(conjoint_dat$exam_)
attribute_list[["motivation_"]] <- levels(conjoint_dat$motivation_)
attribute_list[["occupation_"]] <- levels(conjoint_dat$occupation_)

## Specify randomization constraints on education and occupation attributes. 
## Respondents did not see a school teacher or social worker with GED, 
## High School, or Associates education level
constraint_list <- list()
constraint_list[[1]] <- list()
constraint_list[[1]][["education_"]] <- c("GED", "High school", 
                                          "Associates degree")
constraint_list[[1]][["occupation_"]] <- c("School teacher", "Social worker")

## Pass to makeDesign function
cjoint_design <- 
  makeDesign(type = 'constraints',
             attribute.levels = attribute_list,
             constraints = constraint_list)

#### -------- Estimate AMCEs and print results -------------------------- ####

## Estimate AMCEs  using all attributes in the design, w/ adjustments for 
## conditionally independent attributes (see Hainmueller at al. 2014 S4.1)
est_community <-
  amce(
    conjoint_chosen ~ sex_ + age_ + race_ + education_ +
      residency_ + exam_ + motivation_ + occupation_,
    design = cjoint_design, 
    cluster = TRUE,
    respondent.id = "person_id",
    data = conjoint_dat %>%
      filter(sample == "Civilian sample" ) %>% 
      select(person_id, conjoint_chosen, sex_, age_, race_, education_,
             residency_, exam_, motivation_, occupation_) %>% 
      na.omit()
  )

est_police <-
  amce(
    conjoint_chosen ~ sex_ + age_ + race_ + education_ +
      residency_ + exam_ + motivation_ + occupation_,
    design = cjoint_design, 
    cluster = TRUE,
    respondent.id = "person_id",
    data = conjoint_dat %>%
      filter(sample == "Police sample") %>% 
      select(person_id, conjoint_chosen, sex_, age_, race_, education_,
             residency_, exam_, motivation_, occupation_) %>% 
      na.omit()
  )

## Combine for plotting:
gg_df <-
  bind_rows(
    summary(est_community)$amce %>% mutate(sample = "Civilian sample" ),
    summary(est_police)$amce %>% mutate(sample = "Police sample")
  ) %>%
  set_names(tolower) %>%
  rename(std.error = `std. err`,
         statistic = `z value`,
         p.value = `pr(>|z|)`) %>%
  mutate(
    sample = factor(sample, levels = rev(c(
      "Civilian sample" , "Police sample"
    )),
    labels = rev(c(
      "Civilian sample", "Police sample"
    ))),
    attribute = str_to_title(word(attribute, 1, sep = "_")),
    conf.low = estimate - 1.96 * std.error,
    conf.high = estimate + 1.96 * std.error,
    attribute_ref = factor(
      attribute,
      levels = c(
        "Race",
        "Sex",
        "Exam",
        "Residency",
        "Motivation",
        "Occupation",
        "Education",
        "Age"
      ),
      labels = c(
        "Race/Ethnicity (reference: White)",
        "Sex (reference: Male)",
        "Civil service exam performance (reference: Top 25%)",
        "Length of residency (reference: Does not live in city)",
        "Motivation for application (reference: Job benefits)",
        "Previous occupation (reference: Police officer in another city)",
        "Education (reference: GED)",
        "Age (reference: 23 years old)"
      )
    ),
    level = factor(
      level,
      levels = c(
        "White",
        "Asian",
        "Hispanic/Latino",
        "Black",
        "Top 25%",
        "Top 15%",
        "Top 10%",
        "Top 5%",
        "Top 1%",
        "Does not live in city",
        "For less than 1 year",
        "For 1-2 years",
        "For 3-5 years",
        "For 6-10 years",
        "For more than 10 years",
        "Job benefits",
        "Job security",
        "Career advancement",
        "Friends/relatives in PD",
        "Excitement of the work",
        "To fight crime",
        "Lifelong dream/aspiration",
        "To help people",
        rev(c("Police officer in another city",
              "Military service",
              "Social worker",
              "Security guard",
              "School teacher",
              "Personal trainer",
              "Construction worker",
              "Server/Bartender",
              "Retail salesperson")),
        "High school",
        "Associates degree",
        "Bachelors degree",
        "Graduate degree",
        "Male",
        "Female",
        "23",
        "25",
        "27",
        "29",
        "31",
        "33",
        "35",
        "37"
      ),
      labels = c(
        "White",
        "Asian",
        "Hispanic/Latino",
        "Black",
        "Top 25%",
        "Top 15%",
        "Top 10%",
        "Top 5%",
        "Top 1%",
        "Does not live in city",
        "Less than 1 year",
        "1-2 years",
        "3-5 years",
        "6-10 years",
        "More than 10 years",
        "Job benefits",
        "Job security",
        "Career advancement",
        "Friends/relatives in PD",
        "Excitement of the work",
        "To fight crime",
        "Lifelong dream/aspiration",
        "To help people",
        rev(c("Police officer in another city",
              "Military service",
              "Social worker",
              "Security guard",
              "School teacher",
              "Personal trainer",
              "Construction worker",
              "Server/Bartender",
              "Retail salesperson")),
        "High school",
        "Associates degree",
        "Bachelors degree",
        "Graduate degree",
        "Male",
        "Female",
        "23",
        "25",
        "27",
        "29",
        "31",
        "33",
        "35",
        "37"
      )
    )
  )

plot_df <- 
  gg_df %>%
  filter(attribute %in% c("Race", "Sex", "Residency", "Exam")) %>%
  mutate(
    attribute_ref = factor(
      attribute,
      levels = c("Race",
                 "Sex",
                 "Exam",
                 "Residency"
      ),
      labels = c(
        "Race/Ethnicity (reference: White)",
        "Sex (reference: Male)",
        "Civil service exam performance (reference: Top 25%)",
        "Length of residency (reference: Does not live in city)"
      )
    )) 

## Print results 
plot_df %>% 
  select(attribute, level, estimate, std.error, statistic, p.value, sample) %>% 
  arrange(attribute, level, sample) %>% 
  mutate_if(is.numeric, round, 2) 

#### -------- Generate plot and export PDF ------------------------------- ####
g <- 
  plot_df %>%
  ggplot(.,
         aes(
           x = estimate,
           y = level,
           group = sample,
           color = sample,
           fill = sample,
           shape = sample
         )) +
  geom_vline(
    xintercept = 0,
    color = "black",
    linetype = "dashed",
    size = 0.4
  ) +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high),
    height = 0,
    position = position_dodgev(height = 0.9),
    size = 1.25
  ) +
  geom_point(
    position = position_dodgev(height = 0.9),
    size = 3,
    color = "white"
  ) +
  facet_col(attribute_ref ~ ., scales = "free_y", space = "free") +
  scale_color_manual("", values = rev(c("#000000", "#0072B2"))) +
  scale_fill_manual("", values = rev(c("#000000", "#0072B2"))) +
  scale_shape_manual("", values = rev(c(22, 21))) +
  labs(x = "Average marginal component effect",
       y = "",
       caption = "") +
  theme_tufte(base_size = 14) +
  theme(
    axis.ticks.y = element_blank(),
    axis.ticks.length.x = unit(0.2, "cm"),
    axis.ticks.x = element_line(size = 0.4),
    strip.text = element_text(size = 14, hjust = 0),
    axis.line.x = element_line(size = 0.4),
    axis.title.x = element_text(size = 12),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    plot.margin = unit(c(
      t = 0,
      r = 0.5,
      b = -1,
      l = 0
    ), "lines"),
    legend.position = "none"
  )

g

p <-
  g +
  theme(legend.position = "none") + 
  geom_label(
    data = g$data %>%
      filter(level == "Black") %>% 
      mutate(estimate = 0.20),
    aes(label = sample),
    hjust = -.05,
    position = position_dodgev(height = 1.5),
    color = "white",
    size = 3.5
  ) + 
  labs(x = "Average marginal component effect") +
  theme(
    axis.title.y = element_blank(),
    strip.text.x = element_text(color = "white", size = 12),
    panel.border = element_rect(fill = NA, colour = "black")
  )

p

pdf("fig4.pdf", width = 5.5, height = 7)
p 
grid::grid.text("Race/Ethnicity (ref: White)", 
                x = unit(0.24, "npc"), y = unit(0.98, "npc"), 
                just = "left",
                gp = gpar(fontsize = 14, col = "black", fontfamily = "serif",
                          fontface = "plain"))
grid::grid.text("Sex (ref: Male)", 
                x = unit(0.24, "npc"), y = unit(0.75, "npc"), 
                just = "left",
                gp = gpar(fontsize = 14, col = "black", fontfamily = "serif",
                          fontface = "plain"))
grid::grid.text("Civil service exam score (ref: Top 25%)",
                x = unit(0.24, "npc"), y = unit(0.635, "npc"), 
                just = "left",
                gp = gpar(fontsize = 14, col = "black", fontfamily = "serif",
                          fontface = "plain"))
grid::grid.text("Length of residency (ref: Does not live in city)", 
                x = unit(0.24, "npc"), y = unit(0.36, "npc"), 
                just = "left",
                gp = gpar(fontsize = 14, col = "black", fontfamily = "serif",
                          fontface = "plain"))
dev.off()

#### ----------- Related analyses reported in text ----------------------- ####
community_dat <- 
  conjoint_dat %>%
  filter(sample == "Civilian sample") %>% 
  filter(!is.na(conjoint_chosen))

police_dat <- 
  conjoint_dat %>%
  filter(sample == "Police sample") %>% 
  filter(!is.na(conjoint_chosen))

mod_community <-
  glm(
    conjoint_chosen ~ sex_ + age_ + race_ + education_ +
      residency_ + exam_ + motivation_ + occupation_,
    family = binomial,
    data = community_dat
  )

mod_police <-
  glm(
    conjoint_chosen ~ sex_ + age_ + race_ + education_ +
      residency_ + exam_ + motivation_ + occupation_,
    family = binomial,
    data = police_dat
  )

## Specify data grids for predictions
new_data_community <- 
  datagrid(
    race_ = c("Black", "White"),
    sex_ = c("Male", "Female"),
    exam_ = c("Top 25%", "Top 10%"),
    age_ = "27",
    education_ = "High school",
    residency_ = "For more than 10 years",
    motivation_ = "To help people",
    occupation_ = "Security guard",
    model = mod_community
  )

new_data_police <- 
  datagrid(
    race_ = c("Black", "White"),
    sex_ = c("Male", "Female"),
    exam_ = c("Top 25%", "Top 10%"),
    age_ = "27",
    education_ = "High school",
    residency_ = "For more than 10 years",
    motivation_ = "To help people",
    occupation_ = "Security guard",
    model = mod_police
  )

## Predictions for community sample
pred_community <-
  predictions(
    mod_community,
    vcov = sandwich::vcovCL(mod_community, cluster = ~ person_id, type = "HC2"),
    newdata = new_data_community
  ) %>%
  select(race_, sex_, exam_, predicted, std.error) %>%
  mutate(sample = "Civilian sample") %>% 
  mutate_if(is.numeric, round, 3)

## Predictions for police sample
pred_police <-
  predictions(
    mod_police,
    vcov = sandwich::vcovCL(mod_police, cluster = ~ person_id, type = "HC2"),
    newdata = new_data_police
  ) %>%
  select(race_, sex_, exam_, predicted, std.error) %>%
  mutate(sample = "Police sample") %>% 
  mutate_if(is.numeric, round, 3)

## Combine estimates
pred_dat <- 
  bind_rows(pred_community, pred_police) %>% 
  mutate(conf.low = predicted - qnorm(0.975)*std.error,
         conf.high = predicted + qnorm(0.975)*std.error,
         sample = factor(sample, levels = c("Civilian sample", "Police sample")))

## Illustrative profile for Black male: 
pred_dat %>% 
  filter(race_ == "Black", sex_ == "Male", exam_ == "Top 25%") %>% 
  mutate_if(is.numeric, round, 2)

## White male
pred_dat %>% 
  filter(race_ == "White", sex_ == "Male", exam_ == "Top 25%") %>% 
  mutate_if(is.numeric, round, 2)

## White female
pred_dat %>% 
  filter(race_ == "White", sex_ == "Female", exam_ == "Top 25%") %>% 
  mutate_if(is.numeric, round, 2)

## Black female
pred_dat %>% 
  filter(race_ == "Black", sex_ == "Female", exam_ == "Top 25%") %>% 
  mutate_if(is.numeric, round, 2)

